
clear 
cd "${path}"
cd Data

* Merge dad's data to intergenerational links
use ig_links_nocds_ad, replace
drop mom_id 
merge m:1 dad_id using dads_data, keep(match) nogen
drop mom_id
	cap drop RACEH*
	cap drop RACEW*

* Merge son's age
	merge 1:1 son_id using sons_data, keep(match master) nogen keepusing(AGESONHEAD* AGESONWIFE* RELSON* intnr*)

* Reshape long
	reshape long AGESONHEAD AGESONWIFE RELSON intnr AGEDADHEAD AGEDADWIFE EDUDAD FUMEMBERSDAD KIDSNUMBERDAD FMONEYDAD FAMWEIGHTDAD SEQNODADS RELDAD  intnrdad, i(son_id) j(year)

* Labels
label var RELDAD "Relationship to head of household (dad's household)"
label var AGEDADHEAD "Age of head of household (dad's household)"
	label var intnrdad "Household id of dad"

replace AGEDADHEAD=. if AGEDADHEAD>900
replace AGEDADWIFE=. if AGEDADWIFE>900
replace AGEDADHEAD=. if AGEDADHEAD<=0
replace AGEDADWIFE=. if AGEDADWIFE<=0

	replace AGEDADHEAD=AGEDADHEAD-1
	replace AGEDADWIFE=AGEDADWIFE-1
	
replace AGESONHEAD=. if AGESONHEAD>900
replace AGESONWIFE=. if AGESONWIFE>900
replace AGESONHEAD=. if AGESONHEAD<=0
replace AGESONWIFE=. if AGESONWIFE<=0

	replace AGESONHEAD=AGESONHEAD-1
	replace AGESONWIFE=AGESONWIFE-1
	
ren AGESONHEAD SONHEADAGE
ren AGESONWIFE SONWIFEAGE
	
ren AGEDADHEAD DADHEADAGE
ren AGEDADWIFE DADWIFEAGE


gen SONAGE = .   

replace SONAGE = SONWIFEAGE if RELSON == 2
replace SONAGE = SONWIFEAGE if RELSON == 20
replace SONAGE = SONHEADAGE if RELSON == 1 
replace SONAGE = SONHEADAGE if RELSON == 10

gen PARENTAGE = .
replace PARENTAGE = DADWIFEAGE if RELDAD == 2
replace PARENTAGE = DADWIFEAGE if RELDAD == 20
replace PARENTAGE = DADHEADAGE if RELDAD == 1 
replace PARENTAGE = DADHEADAGE if RELDAD == 10
label var PARENTAGE "Age of parent (dad's household)"

*WE CREATE COHORTPARENT FROM PARENTAGE
cap drop COHORTSON
gen COHORTSON = ((year+1900)-SONAGE)

cap drop COHORTPARENT
gen COHORTPARENT = ((year+1900)-PARENTAGE)


ssc install carryforward

    sort son_id year
    by son_id (year): carryforward COHORTSON, gen (COHORTSONFW)
    gen int negyear = -year
    sort son_id negyear
    by son_id (negyear): carryforward COHORTSON, gen (COHORTSONBK)
    sort son_id year
    replace COHORTSON = COHORTSONFW if mi(COHORTSON) & COHORTSONFW == COHORTSONBK
    
    replace COHORTSON = COHORTSONFW if mi(COHORTSON)
    replace COHORTSON = COHORTSONBK if mi(COHORTSON)

gen FSONAGE = ((year + 1900) - COHORTSON)

* Family income

* CPI-All Urban Consumers (Current Series 2019)
gen RFMONEYDAD=FMONEYDAD
replace RFMONEYDAD=FMONEYDAD / 0.3340 if year==67   
replace RFMONEYDAD=FMONEYDAD / 0.3480 if year==68   
replace RFMONEYDAD=FMONEYDAD / 0.3670 if year==69   
replace RFMONEYDAD=FMONEYDAD / 0.3880 if year==70   
replace RFMONEYDAD=FMONEYDAD / 0.4050 if year==71   
replace RFMONEYDAD=FMONEYDAD / 0.4180 if year==72   
replace RFMONEYDAD=FMONEYDAD / 0.4440 if year==73   
replace RFMONEYDAD=FMONEYDAD / 0.4930 if year==74   
replace RFMONEYDAD=FMONEYDAD / 0.5380 if year==75   
replace RFMONEYDAD=FMONEYDAD / 0.5690 if year==76   
replace RFMONEYDAD=FMONEYDAD / 0.6060 if year==77   
replace RFMONEYDAD=FMONEYDAD / 0.6520 if year==78   
replace RFMONEYDAD=FMONEYDAD / 0.7260 if year==79   
replace RFMONEYDAD=FMONEYDAD / 0.8240 if year==80   
replace RFMONEYDAD=FMONEYDAD / 0.9090 if year==81   
replace RFMONEYDAD=FMONEYDAD / 0.9650 if year==82   
replace RFMONEYDAD=FMONEYDAD / 0.9960 if year==83   
replace RFMONEYDAD=FMONEYDAD / 1.0390 if year==84   
replace RFMONEYDAD=FMONEYDAD / 1.0760 if year==85   
replace RFMONEYDAD=FMONEYDAD / 1.0960 if year==86   
replace RFMONEYDAD=FMONEYDAD / 1.1360 if year==87   
replace RFMONEYDAD=FMONEYDAD / 1.1830 if year==88   
replace RFMONEYDAD=FMONEYDAD / 1.2400 if year==89   
replace RFMONEYDAD=FMONEYDAD / 1.3070 if year==90   
replace RFMONEYDAD=FMONEYDAD / 1.3620 if year==91   
replace RFMONEYDAD=FMONEYDAD / 1.4030 if year==92   
replace RFMONEYDAD=FMONEYDAD / 1.4450 if year==93   
replace RFMONEYDAD=FMONEYDAD / 1.4820 if year==94   
replace RFMONEYDAD=FMONEYDAD / 1.5240 if year==95   
replace RFMONEYDAD=FMONEYDAD / 1.5690 if year==96   
replace RFMONEYDAD=FMONEYDAD / 1.6050 if year==97   
replace RFMONEYDAD=FMONEYDAD / 1.6300 if year==98   
replace RFMONEYDAD=FMONEYDAD / 1.6660 if year==99   
replace RFMONEYDAD=FMONEYDAD / 1.7220 if year==100   
replace RFMONEYDAD=FMONEYDAD / 1.7710 if year==101   
replace RFMONEYDAD=FMONEYDAD / 1.7990 if year==102   
replace RFMONEYDAD=FMONEYDAD / 1.8400 if year==103   
replace RFMONEYDAD=FMONEYDAD / 1.8890 if year==104   
replace RFMONEYDAD=FMONEYDAD / 1.9530 if year==105   
replace RFMONEYDAD=FMONEYDAD / 2.0160 if year==106   
replace RFMONEYDAD=FMONEYDAD / 2.0734 if year==107   
replace RFMONEYDAD=FMONEYDAD / 2.1530 if year==108   
replace RFMONEYDAD=FMONEYDAD / 2.1454 if year==109   
replace RFMONEYDAD=FMONEYDAD / 2.1806 if year==110   
replace RFMONEYDAD=FMONEYDAD / 2.2494 if year==111   
replace RFMONEYDAD=FMONEYDAD / 2.2959 if year==112   
replace RFMONEYDAD=FMONEYDAD / 2.3296 if year==113   
replace RFMONEYDAD=FMONEYDAD / 2.3674 if year==114   
replace RFMONEYDAD=FMONEYDAD / 2.3702 if year==115   
replace RFMONEYDAD=FMONEYDAD / 2.4001 if year==116   
replace RFMONEYDAD=FMONEYDAD / 2.4512 if year==117   
replace RFMONEYDAD=FMONEYDAD / 2.5111 if year==118   
replace RFMONEYDAD=FMONEYDAD / 2.5566 if year==119   
replace RFMONEYDAD=RFMONEYDAD * 0.3340  // 1967 base year

* Drop outliers
gen RFMONEYDADCLEAN = RFMONEYDAD
replace RFMONEYDADCLEAN=. if RFMONEYDAD<100 | RFMONEYDAD>150000

*We keep the observations only if they are head or wife in the house 

replace RFMONEYDADCLEAN = . if ((RELDAD != 1) & (RELDAD != 2)) & ((RELDAD != 10) & (RELDAD != 20)) & RELDAD!=0 & RELDAD!=90

* Log incomes
gen LRFMONEYDADCLEAN = log(RFMONEYDADCLEAN)

* Average log parent earnings when child is aged 15-17
gen TMP15TO17 = LRFMONEYDADCLEAN if (((year+1900) - COHORTSON) <= 17 ) & (((year +1900) - COHORTSON) >= 15)

* Average parent earnings when child is i other age brackets
gen TMP14TO16 = LRFMONEYDADCLEAN if (((year+1900) - COHORTSON) <= 16 ) & (((year +1900) - COHORTSON) >= 14)
gen TMP16TO18 = LRFMONEYDADCLEAN if (((year+1900) - COHORTSON) <= 18 ) & (((year +1900) - COHORTSON) >= 16)
gen TMP17TO19 = LRFMONEYDADCLEAN if (((year+1900) - COHORTSON) <= 19 ) & (((year +1900) - COHORTSON) >= 17)

* Fill in for all
egen PINC15TO17 = mean(TMP15TO17), by (son_id)
egen NUMOBSPINC15TO17 = count(TMP15TO17), by (son_id)

egen PINC14TO16 = mean(TMP14TO16), by (son_id)
egen PINC16TO18 = mean(TMP16TO18), by (son_id)
egen PINC17TO19 = mean(TMP17TO19), by (son_id)

* Parent age when child is 15-17 years old 
gen TMP=PARENTAGE if TMP15TO17!=.
egen PARENTAGE15TO17 = mean(TMP), by(son_id)
drop TMP*

	* Parent household id when child is 15-17 years old 
	gen TMP15=intnrdad if ((year+1900) - COHORTSON) == 15
	egen intnrdad_s15 = mean(TMP15), by(son_id)
	gen TMP16=intnrdad if ((year+1900) - COHORTSON) == 16
	egen intnrdad_s16 = mean(TMP16), by(son_id)
	gen TMP17=intnrdad if ((year+1900) - COHORTSON) == 17
	egen intnrdad_s17 = mean(TMP17), by(son_id)
	drop TMP*

	gen TMP15=intnr if ((year+1900) - COHORTSON) == 15
	egen intnr_s15 = mean(TMP15), by(son_id)
	gen TMP16=intnr if ((year+1900) - COHORTSON) == 16
	egen intnr_s16 = mean(TMP16), by(son_id)
	gen TMP17=intnr if ((year+1900) - COHORTSON) == 17
	egen intnr_s17 = mean(TMP17), by(son_id)
	drop TMP*

	cap drop samehh*
	gen samehh_s15 = (intnrdad_s15==intnr_s15 & intnr_s15!=.)
	gen samehh_s16 = (intnrdad_s16==intnr_s16 & intnr_s16!=.)
	gen samehh_s17 = (intnrdad_s17==intnr_s17 & intnr_s17!=.)
	replace samehh_s15=. if intnr_s15==.
	replace samehh_s16=. if intnr_s16==.
	replace samehh_s17=. if intnr_s17==.


* Education: Missing values
replace EDUDAD = . if EDUDAD == 99 | EDUDAD == 98 | EDUDAD == 0

* Education: Copy previous values downwards, then measure parental education at child age 15-17 
bysort son_id (year): replace EDUDAD = EDUDAD[_n-1] if EDUDAD>=. 
gen TMP15TO17 = EDUDAD if (((year+1900) - COHORTSON) <= 17 ) & (((year +1900) - COHORTSON) >= 15)
egen EDUDADTMP = mean(TMP15TO17), by (son_id)
bysort son_id (year): replace EDUDADTMP = EDUDAD[_N] if EDUDADTMP>=. 
drop TMP15TO17 EDUDAD
ren EDUDADTMP EDUDAD 

* Clean up and save
keep son_id year dad_id SEXDAD RELDAD EDUDAD DADHEADAGE DADWIFEAGE SONHEADAGE /// 
	   SONWIFEAGE SONAGE PARENTAGE COHORTSON COHORTPARENT COHORTSONFW COHORTSONBK ///
	   FSONAGE PINC* NUMOBSPINC15TO17 LRFMONEYDADCLEAN PARENTAGE15TO17 ER30001DAD intnrdad* intnr* samehh*
keep if PINC15TO17!=.
save "dads_data_clean.dta", replace

* Save one row per son and parent
keep son_id dad_id SEXDAD EDUDAD PINC* COHORTSON COHORTPARENT NUMOBSPINC15TO17 PARENTAGE15TO17 ER30001 intnrdad_* intnr_* samehh*
sort son_id dad_id
duplicates drop son_id dad_id , force
save "dads_data_clean_collapse.dta" , replace

